# Load required libraries
suppressMessages(library(tidyverse))
library(rstanarm)
library(lme4)
options(mc.cores = parallel::detectCores() - 1)
source("Code/silent.R")

source_temp <- function(file) {
  # source script & throw everything 
  # into local env which dies after
  # this function terminates:
  local_env <- new.env()
  source(file, local = local_env)
  invisible(NULL)
}

set.seed(96) # For reproducibility

# Ensure full mortality data is available, generate if missing
if (!file.exists("Data/Saves/full_mortality_data_2015_2021_no_2019_second_semester_2020_first_semester.rds")) {
    message("Running preprocess_data_2.R to generate full mortality data...")
    silent(source_temp("Code/preprocess_data_2.R"))
}

# Load electoral turnover data from Stata file
data_electoral_turnover <- haven::read_dta("Data/Covid19_Incumbency_Dataset_workfile_25Nov24.dta") %>%
    as_tibble()

# Join mortality data with electoral turnover data
full_mortality_data <- readRDS("Data/Saves/full_mortality_data_2015_2021_no_2019_second_semester_2020_first_semester.rds") %>%
    left_join(data_electoral_turnover %>%
        rename(Municipality_code = municip))

# Load list of selected cities from Stata file
selected_cities <- haven::read_dta("Data/municip_main.dta") %>%
    pull(municip) %>%
    unique()

# Filter mortality data to selected cities
mortality_data_subset <- full_mortality_data %>%
    filter(Municipality_code %in% selected_cities)

# Fit a GLMM using lme4::glmer
fit <- glmer(
    formula = cbind(NbDeath, Population - NbDeath) ~ (1 + covid | Municipality_code) + covid:turnover + log_MaleToFemaleRatio + log_shareImmigrants + log_shareBlueCollar + log_PopDensity + log_sq_PopDensity + log80 + log3 + log_MedianStandardLiving10k + log_basemort_vb_remain + log_shareUnemp + delta_Immigrants + delta_Unemp + delta_logStandardLiving + log_nl2019 + delta_log_nl + ncandidates,
    data = mortality_data_subset %>% mutate_at(vars(log_MaleToFemaleRatio, log_shareImmigrants, log_shareBlueCollar, log_PopDensity, log_sq_PopDensity, log65, log75, log80, log6575, log6580, log7580, log3, log_MedianStandardLiving10k, log_basemort_vb_remain, log_shareUnemp, delta_Immigrants, delta_Unemp, delta_logStandardLiving, log_nl2019, delta_log_nl, ncandidates), scales::rescale),
    family = "binomial"
)

# Fit the model using multiple optimizers for robustness
tt <- allFit(fit, parallel = "multicore", ncpus = parallel::detectCores() - 1, verbose = FALSE)

# Compute mean relative mortality increase using the lme4 fit with the best optimizer (bobyqa, selected by the largest log-likelihood value)
mean_relative_mortality_increase_lme4 <- tt$bobyqa %>%
    (function(fit) {
        # Prepare data for prediction: only turnover==1 and covid==1
        dd <- mortality_data_subset %>%
            mutate_at(vars(log_MaleToFemaleRatio, log_shareImmigrants, log_shareBlueCollar, log_PopDensity, log_sq_PopDensity, log65, log75, log80, log6575, log6580, log7580, log3, log_MedianStandardLiving10k, log_basemort_vb_remain, log_shareUnemp, delta_Immigrants, delta_Unemp, delta_logStandardLiving, log_nl2019, delta_log_nl, ncandidates), scales::rescale) %>%
            filter(turnover == 1, covid == 1)

        # Predict mortality rates for actual and hypothetical (no turnover) scenarios
        increased_mortality_rate <- predict(fit, type = "response", newdata = dd, re.form = ~0)
        hypothetical_mortality_rate <- predict(fit, type = "response", newdata = dd %>% mutate(turnover = 0), re.form = ~0)
        # Calculate relative increase
        relative_mortality_increase <- (increased_mortality_rate - hypothetical_mortality_rate) / hypothetical_mortality_rate
        return(mean(relative_mortality_increase))
    })

# Fit a Bayesian GLMM using rstanarm::stan_glmer
fit_stan <- stan_glmer(
    formula = cbind(NbDeath, Population - NbDeath) ~ (1 + covid | Municipality_code) + covid:turnover + log_PopDensity + log80 + log_shareUnemp + log_nl2019 + ncandidates,
    data = mortality_data_subset %>% mutate_at(vars(log_MaleToFemaleRatio, log_shareImmigrants, log_shareBlueCollar, log_PopDensity, log80, log_MedianStandardLiving10k, log_basemort_vb_remain, log_shareUnemp, delta_Immigrants, delta_logStandardLiving, log_nl2019, delta_log_nl, ncandidates), scales::rescale),
    family = "binomial",
    algorithm = "meanfield",
    iter = 100000
)

# Compute mean relative mortality increase using the Bayesian fit
mean_relative_mortality_increase_stan <-
    fit_stan %>%
    (function(fit) {
        # Prepare data for prediction: only turnover==1 and covid==1
        dd <- mortality_data_subset %>%
            mutate_at(vars(log_MaleToFemaleRatio, log_shareImmigrants, log_shareBlueCollar, log_PopDensity, log_sq_PopDensity, log65, log75, log80, log6575, log6580, log7580, log3, log_MedianStandardLiving10k, log_basemort_vb_remain, log_shareUnemp, delta_Immigrants, delta_Unemp, delta_logStandardLiving, log_nl2019, delta_log_nl, ncandidates), scales::rescale) %>%
            filter(turnover == 1, covid == 1)

        # Predict mortality rates for actual and hypothetical (no turnover) scenarios using posterior draws
        increased_mortality_rate <- posterior_epred(fit, newdata = dd) %>% mean()
        hypothetical_mortality_rate <- posterior_epred(fit, newdata = dd %>% mutate(turnover = 0)) %>% mean()
        # Calculate relative increase
        relative_mortality_increase <- (increased_mortality_rate - hypothetical_mortality_rate) / hypothetical_mortality_rate
        return(mean(relative_mortality_increase))
    })


if (!dir.exists("Results")) dir.create("Results")

# Save the results to a CSV file
results <- tibble(
    mean_relative_mortality_increase_lme4 = mean_relative_mortality_increase_lme4,
    mean_relative_mortality_increase_stan = mean_relative_mortality_increase_stan
)
write_csv(results, "Results/mean_relative_mortality_increase.csv")
